*----------------------------------------------------------------------------------------------------------	* 
* RESEARCHERS:		EARN												   									*
* PROGRAMMED BY:	Linea Hasager																			*
* DESCRIPTION:					 																			*
*					1) Load data and create variables														*
*					2) Apply sample selection criteria														*
*					3) Variable definitions and labels														*
*					4) Output analysis data set																*
*																											*
* CREATED:			May 18, 2022																		   	*
* LAST MODIFIED:	May 31, 2022														       				*
*----------------------------------------------------------------------------------------------------------	*

clear 
set more off

// Directories
global in "L:\Workdata\707455\Papers\ABFHP_1999reform\Code\ReStat\Input"



*--------------------------------------------------------------------------------------------------------	


*--------------------------------------------------------------------------------------------------------	
* 1) LOAD DATA
*--------------------------------------------------------------------------------------------------------	

use "${in}\Population.dta", replace



*--------------------------------------------------------------------------------------------------------	
* 2) SAMPLE RESTRICTIONS
*--------------------------------------------------------------------------------------------------------	

*DROP IF OLDER THAN 49 TO FOLLOW INDIVIDUALS OF WORKING AGE FOR 18 YEARS POST REFORM
keep if age < 50


*GENERATE ORIGIN COUNTRY DUMMIES
gen iraq=0
replace iraq=1 if opr_land==5436
label var iraq "Iraq"

gen somalia=0
replace somalia=1 if opr_land==5289
label var somalia "Somalia"

gen afghanistan=0
replace afghanistan=1 if opr_land==5404
label var afghanistan "Afghanistan"

gen other_country=1
replace other_country=0 if somalia==1 | iraq==1 | afghanistan==1
label var other_country "Other Country"


*DROP SOMALIS DUE TO IRREGULERITIES OF ASYLUM PROCESSING
keep if somalia==0
drop somalia


*--------------------------------------------------------------------------------------------------------	
* 3) VARIABLE DEFINITIONS AND LABELS
*--------------------------------------------------------------------------------------------------------	

*DEFINE RURAL VS. URBAN MUNICIPALITIES (FIRST RESIDENCE)
*Using definition from Danish Economic Councils (spring, 2015): In capital area and municipalities with *towns larger than 45,000 inhabitants
gen urban=0
replace urban=1 if first_municipality==101 | first_municipality==147 | first_municipality==151 | first_municipality==153 | first_municipality==157 | first_municipality==159 | first_municipality==161 | first_municipality==163 | first_municipality==165 | first_municipality==167 | first_municipality==173 | first_municipality==175 | first_municipality==183 | first_municipality==185 | first_municipality==187| first_municipality==461 | first_municipality==561 | first_municipality==751 | first_municipality==851| first_municipality==730 | first_municipality==731 | first_municipality==621 | first_municipality==615 | first_municipality==630 | first_municipality==631 | first_municipality==265 | first_municipality==657 | first_municipality==253 | first_municipality==217 | first_municipality==817 | first_municipality==831 | first_municipality==837 | first_municipality==557 | first_municipality==571 | first_municipality==651 | first_municipality==677 | first_municipality==685 | first_municipality==601 | first_municipality==609 | first_municipality==629 | first_municipality==623 | first_municipality==509 | first_municipality==717 | first_municipality==723 | first_municipality==729 | first_municipality==747 | first_municipality==719 | first_municipality==255 | first_municipality==263 | first_municipality==603 | first_municipality==605 | first_municipality==611 | first_municipality==617 | first_municipality==627

label var urban "Placed in Urban Municipality"


*RECODE OLD MUNICIPALITIES TO NEW TO BE ABLE TO COMPARE MOVING PROBABILITIES AND CREATE DUMMIES FOR MOVING
*Recoding old municipalities to new
forval g=1/7 {
gen gl_kommune = kom_`g'
merge m:1 gl_kommune using "$in\municipalitykey.dta"
drop if _merge==2
drop _merge
rename ny_kommune kom_new_`g'
label variable kom_new_`g' "Municipality in year `g'"
drop gl_kommune
}

forval g=8/18 {
gen kom_new_`g'=kom_`g'
label variable kom_new_`g' "Municipality in year `g'"
}

*Generate dummy for moving
forval g=1/18 {
gen moved_new_`g'=0
replace moved_new_`g'=1 if first_municipality != kom_new_`g' & kom_new_`g'!=.
label variable moved_new_`g' "Moved in year `g'"
}

*Set move in first year to zero if arrived after 1999
replace moved_new_1=. if year(dato)>1999

drop kom*


*DUMMIES FOR DANISH LANGUAGE LEARNING TRACK (FIRST PLACEMENT)
gen danishf1=0
replace danishf1=1 if level_first==1
label var danishf1 "Danish 1"

gen danishf2=0
replace danishf2=1 if level_first==2
label var danishf2 "Danish 2"

gen danishf3=0
replace danishf3=1 if level_first==3
label var danishf3 "Danish 3"


*GENERATE VARIABLES FOR DAY AND MONTH OF ADMISSION. NORMALIZE TO JANUARY 1ST 1999 ==0
*DAY
gen day_nr=dofd(tilladelsesdato)-date("01011999","DMY")
label variable day_nr "Days Since January 1st 1999"


*MONTH
gen month_nr=mofd(tilladelsesdato)
format %tmMon_CCYY month_nr
label variable month_nr "Month"


*MONTH
gen m_nr=mofd(tilladelsesdato)-468
label variable m_nr "Months Since January 1st 1999"



*WEEK
gen week_nr=wofd(tilladelsesdato)
format %tw week_nr
label variable week_nr "Week"

*COUNT OBSERVATIONS IN EACH  WEEK BIN
bysort week_nr: egen antalw=count(pnr)
label variable antalw "Permits per Week"


*DUMMY FOR FAMILY-REUNIFICATION TO REFUGEE
gen familyref=0
replace familyref=1 if ref_type==2
label var familyref "Family-Reunified to Refugee"



*PLACED IN FIVE LARGEST CITIES (CPH INCL. FREDERIKSBERG, ÅRHUS, ÅLBORG, ODENSE, ESBJERG)
gen top5=0
replace top5=1 if inlist(first_municipality,101, 147, 751, 851, 461, 561)
label var top5 "Placed in Five Largest Cities"



*DUMMY FOR LANGUAGE USING LATIN ALPHABET VS. OTHER (AND UNKNOWN)
gen latin_a=0
foreach language in albanian berber bosnian english estonian french indonesian kirundi kurdish latvian luganda polish portugese romani romanian serbian slovakian somali spanish tagalog turkish hungarian vietnamese wolof {
replace latin_a=1 if `language'==1
}
label var latin_a "Mother Tongue Latin Alphabet"




*GENERATE AGE GROUP DUMMIES (NO DEPENDENTS FOR <25)
gen age25=0
replace age25=1 if age <25 & kids02==0 & kids018==0
label var age25 "Under 25yo and No Dependents"





*CHECK MOST FREQUENT ISCO 2 DIGIT OVER ENTIRE PERIOD
preserve
*2-DIGIT ISCO 88 CODES
forval i=1/18 {
gen isco2d_`i'=int(isco4d_`i'/100)
}
keep pnr year_admission isco2d*
reshape long isco2d_, i(pnr) j(t)
ta isco2d_
restore


*GENERATE AVERAGE CUMULATIVE EMPLOYED IN COMPLEX JOB AND NON-COMPLEX JOB (NUMBER OF YEARS IN COMPLEX JOB DIVIDED BY YSM)
foreach var in complex_job non_complex_job {
gen `var'_y1=`var'_1
replace `var'_y1=0 if missing(`var'_1)
label var `var'_y1 "Average Cumulative Years Employed in `var' Year 1"

forval i=2/18 {
local j = `i'-1
gen `var'_y`i'=(`var'_y`j'*(`j')+`var'_`i')/`i'
label var `var'_y`i' "Average Cumulative Years Employed in `var' Year `i'"
}
replace `var'_y1=`var'_1
}

*DROP UNNECESSARY VARIABLES
foreach var in non_complex_job {
drop `var'_y1-`var'_y17 
}



*GENERATE AVERAGE CUMULATIVE CRIME (NUMBER OF YEARS WITH CRIME DIVIDED BY YSM)
foreach var in convicted {
gen `var'_y1=`var'1
replace `var'_y1=0 if missing(`var'1)
label var `var'_y1 "Avg. Cumulative Years `var' of Crime 1 Year"

forval i=2/18 {
local j = `i'-1
gen `var'_y`i'=(`var'_y`j'*(`j')+`var'`i')/`i'
label var `var'_y`i' "Avg. Cumulative Years `var' of Crime `i' Years"
}
replace `var'_y1=`var'1
}

*DROP UNNECESSARY VARIABLES
foreach var in convicted {
drop `var'_y1-`var'_y17 
}



*DUMMIES FOR OTHER CRIME THAN SHOPLIFTING IN YEAR 1-2
forval i=1/2 {
gen charged_other`i'=charged`i'-property_sm_krsi`i'
gen convicted_other`i'=convicted`i'-property_sm_kraf`i'
label var convicted_other`i' "Convicted of Other Crime than Shoplifting Year `i'"
label var charged_other`i' "Charged with Other Crime than Shoplifting Year `i'"
label var charged`i' "Charged with Crime Year `i'"
label var property_sm_krsi`i' "Charged with Shoplifting Year `i'" 
label var property_sm_kraf`i' "Convicted of Shoplifting Year `i'" 
} 

forval i=1/18 {
	label var convicted`i' "Covicted of Crime Year `i'"
}


*LABELS 
forval i=1/18 {
	label var loenmv_13_y_`i'_sum "Average Cumulative Earnings Year `i'"
}

*PERMIT TYPES
gen quota=0
replace quota=1 if forklar==28
gen convention=0
replace convention=1 if forklar==23 | forklar==27
gen other_permit=0
replace other_permit=1 if quota==0 & convention==0 & familyref==0

label variable quota "Quota Refugee"
label variable convention "Refugee Under Geneva Convention"
label variable other_permit "Other Refugee Permit"


*MARITAL STATUS
gen married=0
replace married=1 if civst==11
label variable married "Married"

gen unmarried=0
replace unmarried=1 if married==1
label variable unmarried "Unmarried"


*AGE SQUARED
gen age_sq=age*age
label var age_sq "Age Squared"




*DUMMIES FOR INTEGRATION OR EMPLOYMENT SERVICES
foreach type in intser emplser {
foreach y in  12 18 24 30 36 {
replace `type'_`y'_d=0 if `type'_`y'_d==.
}
}

label var emplser_12_d "In Employment Support within 12 Months"
label var emplser_18_d "In Employment Support within 18 Months"
label var emplser_24_d "In Employment Support within 24 Months"
label var emplser_30_d "In Employment Support within 30 Months"
label var emplser_36_d "In Employment Support within 36 Months"


*DUMMIES FOR  EITHER INTEGRATION OR EMPLOYMENT SERVICES
foreach y in  12 18 24 30 36 {
gen amfo_`y'_d=0
replace amfo_`y'_d=1 if emplser_`y'_d==1 | intser_`y'_d==1
label var amfo_`y'_d "In Integration or Employment Services Within `y' Months"
}



*DUMMIES FOR PARTICIPATION IN ALMP OR LANGUAGE TRAINING
foreach i in  12 18 24 30 36 {
gen amfo_dkdb_`i'=0
replace amfo_dkdb_`i'=1 if emplser_`i'_d==1 | in_dkdb_`i'==1
label var amfo_dkdb_`i' "Participating in employment service or language training within `i' months"
}


*HOURS IN ALMP
foreach i in  12 18 24 30 36 {
replace hours_emplser_`i'=0 if hours_emplser_`i'==. 
replace hours_assigned_`i'=0 if hours_assigned_`i'==. 
label var hours_emplser_`i' "Hours in Employment Support within `i' Months"
}


*SET HOURS AND PARTICIPATION TO MISSING IF INDIVIDUAL CANNOT BE OBSERVED IN DKDB YET (DKDB STARTS IN JANUARY 1999)
replace hours_assigned_12=. if m_nr <-12
replace hours_emplser_12=.	if m_nr <-12
replace emplser_12_d=.	 	if m_nr <-12
replace amfo_dkdb_12=. 		if m_nr <-12
replace in_dkdb_12=. 		if m_nr <-12
replace hours_assigned_18=. if m_nr <-18
replace hours_emplser_18=.	if m_nr <-18
replace emplser_18_d=.	 	if m_nr <-18
replace amfo_dkdb_18=. 		if m_nr <-18
replace in_dkdb_18=. 		if m_nr <-18
replace hours_assigned_24=. if m_nr <-24
replace hours_emplser_24=.	if m_nr <-24
replace emplser_24_d=.	 	if m_nr <-24
replace amfo_dkdb_24=. 		if m_nr <-24
replace in_dkdb_24=. 		if m_nr <-24


*DROP UNNECESSARY VARIABLES
drop afghan-amharic armenian-bulgarian english-wolof  forklar opr_land civst ref_type level_first dato intser* 


*--------------------------------------------------------------------------------------------------------	
* 4) OUTPUT ANALYSIS DATA
*--------------------------------------------------------------------------------------------------------	


*SAVE DATA SET FOR ANALYSIS
save "$in\Analysis_sample", replace
